avg.boot.HC <- bnlearn::averaged.network(str_boot_HC, threshold = 0.95)
plot.network(avg.boot.HC, ht = "900px")plot(str_boot_HC)
abline(v = 0.54, col = "tomato", lty = 2, lwd = 2)
abline(v = 0.70, col = "steelblue", lty = 2, lwd = 2)
abline(v = 0.88, col = "steelblue", lty = 2, lwd = 2)
abline(v = 0.95, col = "orange", lty = 2, lwd = 2)outcomes_vars <- c("c_asthma", "w_asthma","trt_copd","noalle_asthma","alle_asthma","any_smp","only_sysmptomps")
#socioeconomic_vars <- colnames(df_all %>% select(matches("IgE_")))
socioeconomic_vars <- c("edu_credits","sei_class","syk_class")
smoking_vars <- c("smoking_status","duration", "startage")
confounders_vars <- c("age","BMI","gender","trt_bp","trt_sleep","trt_diabetes","hereditery_asthma","hereditery_allergy","smoke_expwork")
# get the names of all the variables from the dataframe we used to learn the network
# see the boot_bn_learn r code file
#data_modeling <- mice::complete(loaded_mice_object, 20)
#data_modeling <- Prepare_data_bn(raw_correlated_data = raw_data, imputed_data = data_modeling)
aux_variables <- setdiff(colnames(data_modeling), c(outcomes_vars, socioeconomic_vars, confounders_vars, smoking_vars))# choose a less dense and simpler network
BN_threshold = 0.95
#avg.simpler_mice_hc = averaged.network(str_boot_HC, threshold = BN_threshold)
#avg.simpler_mice_hc = cextend(avg.simpler_mice_hc)
# decide the width of the edges
# sometimes when the resulted neetwork has issue with cycle ignored then the averaege.network function above will fix it while str.width is not
# this will result in error when creating the network as the edge dataframe will not match
str.width <- str_boot_HC %>% dplyr::filter(strength > BN_threshold & direction >= 0.5)
nodes.uniq <- unique(c(avg.simpler_mice_hc$arcs[,1], avg.simpler_mice_hc$arcs[,2]))
nodes <- data.frame(id = nodes.uniq,
label = nodes.uniq,
#color = "darkturquoise",
shadow = TRUE#, group = c("CL","CO", "O","E","E","CO","CO","E","O","E","E","E","CO","E","E","E","CO","E","E","CO","E","CO"))
)
nodes <- nodes %>% mutate(group = case_when( (label %in% smoking_vars) ~ "Smoking", (label %in% socioeconomic_vars) ~ "SocioEconomic", (label %in% outcomes_vars) ~"Outcomes", (label %in% confounders_vars) ~ "Confounders", (label %in% aux_variables) ~ "auxiliary" ))
#group = c("CL","CO", "O","E","E","CO","CO","E","O","E","E","E","CO","E","E","E","CO","E","E","CO","E","CO","CO","CO")
edges <- data.frame(from = avg.simpler_mice_hc$arcs[,1],
to = avg.simpler_mice_hc$arcs[,2],
arrows = "to",
smooth = TRUE,
shadow = TRUE,
#width=str.df_all$strength,
value=str.width$strength/10,
color = "black")visNetwork(nodes, edges, width = "100%", height = "700px") %>% visIgraphLayout() %>%
# darkblue square with shadow for group "A" #visGroups(groupname = "E", color = "darkblue",
# shadow = list(enabled = TRUE)) %>% red triangle for group "B" visGroups(groupname = "CO", color = "red") %>% # see the visnetwork web page
visOptions(highlightNearest = list(enabled = T, degree = 1, hover = F), selectedBy = "group",collapse=TRUE) %>% visLayout(randomSeed = 100) %>% visPhysics(stabilization = FALSE) edges <- data.frame(from = avg.simpler_mice_hc$arcs[,1],
to = avg.simpler_mice_hc$arcs[,2],
arrows = "to",
smooth = TRUE,
shadow = TRUE,
#width=str.df_all$strength,
#value=str.width$strength/10,
color = "black")
visNetwork(nodes, edges, width = "100%",height = "900px") %>%
visIgraphLayout() %>%
visNodes(
shape = "dot",
color = list(
background = "#0085AF",
border = "#013848",
highlight = "#FF8000"
),
shadow = list(enabled = TRUE, size = 10)
) %>%
visEdges(
shadow = FALSE,
color = list(color = "#0085AF", highlight = "#C62F4B")
) %>%
visOptions(highlightNearest = list(enabled = T, degree = 1, hover = T),
selectedBy = "group") %>%
visLayout(randomSeed = 11)tar_load(cv.bn_hc)
cv.bn_hc
k-fold cross-validation for Bayesian networks
target network structure:
[hereditery_asthma][smoking_status][jabstatus|smoking_status]
[e_amount|smoking_status][startage|smoking_status][sei_class|jabstatus]
[duration|jabstatus:smoking_status][education|jabstatus:sei_class]
[trt_bp|jabstatus:education][syk_class|education:sei_class]
[gender|sei_class:syk_class][trt_sleep|jabstatus:gender:trt_bp]
[trt_diabetes|trt_bp:trt_sleep][asthma_tm|hereditery_asthma:trt_sleep]
[hereditery_allergy|hereditery_asthma:education:asthma_tm]
[any_smp|hereditery_allergy:smoking_status:asthma_tm]
[herditery_pulldis|hereditery_asthma:asthma_tm:any_smp]
[DGF_work|gender:syk_class:any_smp][BMI|gender:trt_bp:any_smp]
[c_asthma|hereditery_asthma:asthma_tm:any_smp][cbc|any_smp]
[w_asthma|asthma_tm:any_smp][age|jabstatus:trt_bp:BMI]
[smoke_expwork|DGF_work][her_dis|hereditery_asthma:herditery_pulldis]
[copd|cbc][only_symptoms|c_asthma:any_smp]
[noalle_asthma|hereditery_allergy:c_asthma]
[alle_asthma|hereditery_allergy:c_asthma][smoke_exphome|smoke_expwork]
number of folds: 10
loss function:
Classification Error (Posterior, cond. Gauss.)
training node: c_asthma
number of runs: 40
average loss over the runs: 0.0003323889
standard deviation of the loss: 8.268791e-08
effe_modif_vars <- colnames(data_modeling %>% select(sei_class, smoking_status))
prop_sei <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars, outcome = "c_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# Add the names
prop_sei <- prop_sei %>% mutate(seiClass_new = case_when(sei_class == 0 ~ "Professionals and executives", sei_class == 1 ~ "Manual work in industry", sei_class == 2 ~ "Manual work in service", sei_class == 3 ~ "Assistant Non-manual employees", sei_class == 4 ~ "Intermediate Non-manual employees", sei_class == 5 ~ "Self-employed Non-professionals", sei_class == 6 ~ "students and housewives", sei_class == 7 ~ "unclassified")) %>% mutate(smoking_listNew = case_when(smoking_status == 0 ~ "non_smoker", smoking_status == 1 ~ "formerSmoker", smoking_status == 2 ~ "currentSmoker"))
#library(plotly)
# plot the results
prop_sei_p <- ggplot(prop_sei, aes(x = seiClass_new, y = prob)) + geom_errorbar( aes(ymin = q05, ymax = q975, color = smoking_listNew), position = position_dodge(0.3), width = 0.2) + geom_point(aes(color = smoking_listNew), position = position_dodge(0.3)) + scale_color_manual(values = c("#00AFBB", "#E7B800",'#999999')) + theme_classic() + scale_x_discrete(labels = function(x) {stringr::str_wrap(x, width = 8)})
prop_sei_p#ggplotly(prop_sei_p)
#subplot(prop_sei_p, prop_sei_p)effe_modif_vars <- colnames(data_modeling %>% select(sei_class, smoking_status))
prop_sei_w <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars, outcome = "w_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# Add the names
prop_sei_w <- prop_sei_w %>% mutate(seiClass_new = case_when(sei_class == 0 ~ "Professionals and executives", sei_class == 1 ~ "Manual work in industry", sei_class == 2 ~ "Manual work in service", sei_class == 3 ~ "Assistant Non-manual employees", sei_class == 4 ~ "Intermediate Non-manual employees", sei_class == 5 ~ "Self-employed Non-professionals", sei_class == 6 ~ "students and housewives", sei_class == 7 ~ "unclassified")) %>% mutate(smoking_listNew = case_when(smoking_status == 0 ~ "non_smoker", smoking_status == 1 ~ "formerSmoker", smoking_status == 2 ~ "currentSmoker"))
# plot the results
prop_sei_w_p <- ggplot(prop_sei_w, aes(x = seiClass_new, y = prob)) + geom_errorbar( aes(ymin = q05, ymax = q975, color = smoking_listNew), position = position_dodge(0.3), width = 0.2) + geom_point(aes(color = smoking_listNew), position = position_dodge(0.3)) + scale_color_manual(values = c("#00AFBB", "#E7B800",'#999999')) + theme_classic() + scale_x_discrete(labels = function(x) {stringr::str_wrap(x, width = 8)})
prop_sei_w_pprop_sei_as <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars, outcome = "any_smp", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# Add the names
prop_sei_as <- prop_sei_as %>% mutate(seiClass_new = case_when(sei_class == 0 ~ "Professionals and executives", sei_class == 1 ~ "Manual work in industry", sei_class == 2 ~ "Manual work in service", sei_class == 3 ~ "Assistant Non-manual employees", sei_class == 4 ~ "Intermediate Non-manual employees", sei_class == 5 ~ "Self-employed Non-professionals", sei_class == 6 ~ "students and housewives", sei_class == 7 ~ "unclassified")) %>% mutate(smoking_listNew = case_when(smoking_status == 0 ~ "non_smoker", smoking_status == 1 ~ "formerSmoker", smoking_status == 2 ~ "currentSmoker"))
# plot the results
prop_sei_as_p <- ggplot(prop_sei_as, aes(x = seiClass_new, y = prob)) + geom_errorbar( aes(ymin = q05, ymax = q975, color = smoking_listNew), position = position_dodge(0.3), width = 0.2) + geom_point(aes(color = smoking_listNew), position = position_dodge(0.3)) + scale_color_manual(values = c("#00AFBB", "#E7B800",'#999999')) + theme_classic() + scale_x_discrete(labels = function(x) {stringr::str_wrap(x, width = 8)})
prop_sei_as_pprop_sei_aas <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars, outcome = "alle_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# Add the names
prop_sei_aas <- prop_sei_aas %>% mutate(seiClass_new = case_when(sei_class == 0 ~ "Professionals and executives", sei_class == 1 ~ "Manual work in industry", sei_class == 2 ~ "Manual work in service", sei_class == 3 ~ "Assistant Non-manual employees", sei_class == 4 ~ "Intermediate Non-manual employees", sei_class == 5 ~ "Self-employed Non-professionals", sei_class == 6 ~ "students and housewives", sei_class == 7 ~ "unclassified")) %>% mutate(smoking_listNew = case_when(smoking_status == 0 ~ "non_smoker", smoking_status == 1 ~ "formerSmoker", smoking_status == 2 ~ "currentSmoker"))
# plot the results
prop_sei_aas_p <- ggplot(prop_sei_aas, aes(x = seiClass_new, y = prob)) + geom_errorbar( aes(ymin = q05, ymax = q975, color = smoking_listNew), position = position_dodge(0.3), width = 0.2) + geom_point(aes(color = smoking_listNew), position = position_dodge(0.3)) + scale_color_manual(values = c("#00AFBB", "#E7B800",'#999999')) + theme_classic() + scale_x_discrete(labels = function(x) {stringr::str_wrap(x, width = 8)})
prop_sei_aas_pprop_sei_naas <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars, outcome = "noalle_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# Add the names
prop_sei_naas <- prop_sei_naas %>% mutate(seiClass_new = case_when(sei_class == 0 ~ "Professionals and executives", sei_class == 1 ~ "Manual work in industry", sei_class == 2 ~ "Manual work in service", sei_class == 3 ~ "Assistant Non-manual employees", sei_class == 4 ~ "Intermediate Non-manual employees", sei_class == 5 ~ "Self-employed Non-professionals", sei_class == 6 ~ "students and housewives", sei_class == 7 ~ "unclassified")) %>% mutate(smoking_listNew = case_when(smoking_status == 0 ~ "non_smoker", smoking_status == 1 ~ "formerSmoker", smoking_status == 2 ~ "currentSmoker"))
# plot the results
prop_sei_naas_p <- ggplot(prop_sei_naas, aes(x = seiClass_new, y = prob)) + geom_errorbar( aes(ymin = q05, ymax = q975, color = smoking_listNew), position = position_dodge(0.3), width = 0.2) + geom_point(aes(color = smoking_listNew), position = position_dodge(0.3)) + scale_color_manual(values = c("#00AFBB", "#E7B800",'#999999')) + theme_classic() + scale_x_discrete(labels = function(x) {stringr::str_wrap(x, width = 8)})
prop_sei_naas_peffe_modif_vars_edu <- colnames(data_modeling %>% select(education, smoking_status))
# get the cp
prop_edu <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_edu, outcome = "c_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# get the plot
prop_edu_p <- get_cpq_plot(res_data = prop_edu, effe_modif_vars = effe_modif_vars_edu, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_edu_p[[2]]prop_edu_w <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_edu, outcome = "w_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# get the plot
prop_edu_w_p <- get_cpq_plot(res_data = prop_edu_w, effe_modif_vars = effe_modif_vars_edu, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_edu_w_p[[2]]prop_edu_as <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_edu, outcome = "any_smp", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# get the plot
prop_syk_as_p <- get_cpq_plot(res_data = prop_edu_as, effe_modif_vars = effe_modif_vars_edu, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_syk_as_p[[2]]prop_edu_aas <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_edu, outcome = "alle_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
prop_edu_aas_p <- get_cpq_plot(res_data = prop_edu_aas, effe_modif_vars = effe_modif_vars_edu, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_edu_aas_p[[2]]prop_edu_naas <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_edu, outcome = "noalle_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
prop_edu_naas_p <- get_cpq_plot(res_data = prop_edu_naas, effe_modif_vars = effe_modif_vars_edu, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_edu_naas_p[[2]]effe_modif_vars_syk <- colnames(data_modeling %>% select(syk_class, smoking_status))
# get the cp
prop_syk <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_syk, outcome = "c_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# get the plot
prop_syk_p <- get_cpq_plot(res_data = prop_syk, effe_modif_vars = effe_modif_vars_syk, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_syk_p[[2]]prop_syk_w <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_syk, outcome = "w_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# get the plot
prop_syk_w_p <- get_cpq_plot(res_data = prop_syk_w, effe_modif_vars = effe_modif_vars_syk, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_syk_w_p[[2]]prop_syk_as <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_syk, outcome = "any_smp", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
# get the plot
prop_syk_as_p <- get_cpq_plot(res_data = prop_syk_as, effe_modif_vars = effe_modif_vars_syk, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_syk_as_p[[2]]prop_syk_aas <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_syk, outcome = "alle_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
prop_syk_aas_p <- get_cpq_plot(res_data = prop_syk_aas, effe_modif_vars = effe_modif_vars_syk, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_syk_aas_p[[2]]prop_syk_naas <- cpq_effe_modif(.data = data_modeling, vars = effe_modif_vars_syk, outcome = "noalle_asthma", state = "1", model = avg.simpler_mice_hc, repeats = 200000)
prop_syk_naas_p <- get_cpq_plot(res_data = prop_syk_naas, effe_modif_vars = effe_modif_vars_syk, original_raw_data = as.data.frame(raw_data), final_data = data_modeling)
prop_syk_naas_p[[2]]